state <- "ma"
priors_versions <- c("v1", "v2", "v3", "v4")
versions <- tibble(
version = c("v1", "v2", "v3", "v4"),
vlabel = factor(
c( "$Priors\\,Do\\,Not\\,Vary\\,by\\,County\\,and\\,Date$",
"$\\beta$ Centered at Emp. Value",
"$P(S_1|untested)$ and $\\beta$ Centered at Emp. Values",
"$P(S_1|untested)$ Centered at Emp. Value"),
levels = c(
"$Priors\\,Do\\,Not\\,Vary\\,by\\,County\\,and\\,Date$",
"$\\beta$ Centered at Emp. Value",
"$P(S_1|untested)$ Centered at Emp. Value",
"$P(S_1|untested)$ and $\\beta$ Centered at Emp. Values"
) )
)
state_corrected_path <- here("analysis/results/adj_biweekly_county/", state, "/")
################################
# read fips
################################
fips <- read_tsv(here::here("data/demographic/county_fips.tsv")) %>%
rename_with(tolower)
Ranking Ratio of Estimated to Observed
subset <- corrected %>%
filter(version=="v2")
ranks <- subset %>%
mutate(ratio = exp_cases_median/positive,
tested = total/population) %>%
# one observation per biweek per state
select(biweek, date, ratio, county_name,tested) %>%
group_by(biweek,ratio,county_name,tested) %>%
summarize(date=min(date)) %>%
# rank for each time interval
group_by(biweek) %>%
mutate(rank = rank(ratio),
rank_tested=rank(-tested))
## `summarise()` has grouped output by 'biweek', 'ratio', 'county_name'. You can
## override using the `.groups` argument.
ranks %>%
ungroup() %>%
ggplot(aes(x=date, y = ratio, color = county_name,
group=county_name)) +
geom_point() +
geom_line()

ranks %>%
ungroup() %>%
ggplot(aes(x=date, y = rank, color = county_name,
group=county_name)) +
geom_point() +
geom_line()

ranks %>%
ungroup() %>%
ggplot(aes(x=rank_tested, y = rank, color = county_name,
group=county_name)) +
geom_point()

r <- ranks %>%
mutate(rank_group = case_when(
rank <= 3 ~ "Top 3",
rank > 10 ~ "Bottom 3",
TRUE ~ "Middle" )) %>%
group_by(county_name, rank_group) %>%
mutate(n=n()) %>%
group_by(county_name) %>%
mutate(max_group = rank_group[which.max(n)]) %>%
filter( max(n) >=20) %>%
filter(max_group != "Middle") %>%
ungroup()
end_labs <- r %>%
ungroup() %>%
filter(date==max(date)) %>%
# shift to the right
mutate(date = date + days(10)) %>%
select(county_name, rank, date) %>%
ungroup()
##############################
# RANK PLOT OVER TIME
##############################
#
# set.seed(999)
#
# pal1 <-viridis_pal(option="viridis", end = .9, begin=.3)(3)
# pal2 <- viridis_pal(option="rocket", end = .9, begin=.3)(2)
# pal3 <- viridis_pal(option="magma", end = .9, begin=.3)(2)
# pal4 <- viridis_pal(option="mako", end = .8, begin=.3)(2)
# pal5 <- viridis_pal(option="cividis", end = .9, begin=.1)(3)
#
#
# rankpal <- c(pal1, pal2, pal3,pal4,pal5)
# indexes <- sample.int(length(rankpal), replace=FALSE)
# rankpal <- rankpal[indexes]
set.seed(123)
rankpal <- c("#b4ddd4", "#7b2c31", "#65d04b", "#da239b", "#b7d165", "#453fbc", "#658114", "#af3fe8", "#ffb947", "#0a60a8", "#f87945", "#16d0ae")
rankpal <- c("#851657", "#be3acd", "#19a71f", "#824598", "#ed820a", "#BB8E37", "#974810", "#1f84ec", "#d02023", "#059dc5", "#f23387", "#16d0ae")
indexes <- sample.int(length(rankpal), replace=FALSE)
rankpal <- rankpal[indexes]
r %>%
# filter(rank_group!="Middle") %>%
ggplot() +
geom_point(aes(x=date,y=rank,
group=county_name,
color= county_name),
size=.5) +
geom_line(aes(x=date,y=rank,
group=county_name,
color= county_name)) +
# facet_wrap(~max_group) +
ggrepel::geom_text_repel( aes(x= date-days(4),
y = rank,
color=county_name,
label = county_name),
end_labs,
min.segment.length=2,
nudge_y=0,
hjust=0,
size=7,
direction="y",
force_pull=9,
box.padding=.15) +
theme_c(legend.position="none",
axis.text.x = element_text(size=17,angle=30)) +
theme(plot.subtitle =element_text(size=19, margin=margin(4,0,4,0)),
axis.title.x=element_text(size=16),
axis.title.y=element_text(size=19)) +
# scale_color_brewer(palette="Dark2") +
scale_color_manual(values=rankpal) +
scale_x_date(date_breaks="1 months", date_labels = "%b %Y",
limits = c(ymd("2021-03-01"),
ymd("2022-03-05"))) +
labs(y = "Rank of Ratio",
title = "Rank of Estimated Infections to Observed Infections Over Time",
subtitle = "For Counties Ranked in the Top or Bottom 3\nfor More Than 80% of Time Intervals",
x="") +
scale_y_reverse(breaks=seq(1,13, by=1))

ggsave(here('thesis/figure/rank-ratio-over-time-ma-county.pdf'), height=8,width=13)
Faceted by version
pal <- c("#10BAC5", "#1B10C5", "#EFB719", "#900C3F")
names(pal) <- c(#"$observed$",
'$P(S_1|untested)$ Centered at Emp. Value',
'$P(S_1|untested)$ and $\\beta$ Centered at Emp. Values',
"$Priors\\,Do\\,Not\\,Vary\\,by\\,County\\,and\\,Date$",
"$\\beta$ Centered at Emp. Value"
)
joined %>%
filter(biweek >=6) %>%
group_by(biweek) %>%
mutate(xmin = min(date),
xmax = max(date)) %>%
ungroup() %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub,
fill = vlabel),
alpha = .5,
show.legend=FALSE) +
geom_linerange(aes(xmin = xmin,
xmax=xmax,
y = positive,
color = 'obs')) +
facet_grid(county_name~vlabel, scales = "free_y",
labeller=as_labeller(
latex2exp::TeX, default = label_parsed)) +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "2 months",
date_labels = "%b %Y") +
theme_c(axis.text.x = element_text(angle = 60, size = 16),
plot.title=element_text(face = "bold",
hjust = .5,
size = 30,
margin=margin(5,5,15,5,'pt')),
strip.background = element_rect(fill = "#393939"),
strip.text = element_text(color = "white", size = 16),
strip.text.y = element_text(margin = margin(5,5,5,5,'pt'),
size = 14,
face="bold"),
strip.text.x = element_text(margin = margin(5,3,5,3,'pt'),
face = "bold",
size = 10),
plot.subtitle = ggtext::element_markdown(size = 25,margin = margin(3,5,10,5,'pt'),
face = "italic"),
legend.position = "top",
legend.text =element_text(size = 18)) +
scale_fill_manual(values = pal) +
labs(y = "Estimated Cases",
title = paste0("Estimated Cases by Version in ", toupper(state))) +
scale_color_manual(values = c('obs' = 'red'), labels = 'Positive Tests',
name = '') +
guides(color = guide_legend(override.aes = list(linewidth =2)))

ggsave(here::here(paste0('thesis/figure/', state, '_pb_compared_to_observed.pdf')),
height=19, width = 15, dpi=400)
All versions together
joined %>%
group_by(biweek) %>%
filter(biweek >=6) %>%
mutate(xmin = min(date),
xmax = max(date)) %>%
ungroup() %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub,
fill = vlabel),
alpha = .5,
show.legend=TRUE) +
# geom_linerange(aes(xmin = xmin,
# xmax=xmax,
# y = exp_cases_median,
# color =vlabel)) +
facet_wrap(~county_name, scales = "free_y",
labeller=as_labeller(
latex2exp::TeX, default = label_parsed),
ncol = 3) +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "2 months",
date_labels = "%b %Y") +
theme_c(axis.text.x = element_text(angle = 30, size = 16),
plot.title=element_text(face = "bold",
hjust = .5,
size = 32,
margin=margin(5,5,15,5,'pt')),
strip.background = element_rect(fill = "#393939"),
strip.text = element_text(color = "white", size = 16),
strip.text.y = element_text(margin = margin(5,5,5,5,'pt'),
size = 14,
face="bold"),
strip.text.x = element_text(margin = margin(5,3,5,3,'pt'),
face = "bold",
size = 18),
plot.subtitle = ggtext::element_markdown(size = 25,margin = margin(3,5,10,5,'pt'),
face = "italic"),
legend.position = "top",
legend.text =element_text(size = 20,hjust=0)) +
# scale_color_manual(values = pal, labels =TeX(names(pal))) +
scale_fill_manual(values = pal, labels =TeX(names(pal)),
name ='') +
labs(y = "Estimated Infections",
title = paste0("Estimated Infections by Version in Massachusetts"),
x="") +
guides(fill = guide_legend(byrow=TRUE,nrow=4))

ggsave(here::here(paste0('thesis/figure/', state, '_pb_compare_versions.pdf')),
height=19, width = 18, dpi=400)
Compare to Covidestim
joined %>%
filter(biweek >=6) %>%
# covidestim does not report estimates for Nantucket
filter(!grepl("Nantucket", county_name )) %>%
group_by(biweek) %>%
mutate(xmin = min(date),
xmax = max(date)) %>%
ungroup() %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub,
fill = vlabel),
alpha = .5,
show.legend=FALSE) +
geom_linerange(aes(xmin = xmin,
xmax=xmax,
y = infections,
color = 'covidestim')) +
facet_grid(county_name~vlabel, scales = "free_y",
labeller=as_labeller(
latex2exp::TeX, default = label_parsed)) +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "2 months",
date_labels = "%b %Y") +
theme_c(axis.text.x = element_text(angle = 60, size = 16),
plot.title=element_text(face = "bold",
hjust = .5,
size = 30,
margin=margin(5,5,15,5,'pt')),
strip.background = element_rect(fill = "#393939"),
strip.text = element_text(color = "white", size = 16),
strip.text.y = element_text(margin = margin(5,5,5,5,'pt'),
size = 14,
face="bold"),
strip.text.x = element_text(margin = margin(5,3,5,3,'pt'),
face = "bold",
size = 10),
plot.subtitle = ggtext::element_markdown(size = 25,
margin = margin(3,5,10,5,'pt'),
face = "italic"),
legend.position = "top",
legend.text =element_text(size = 19)) +
scale_fill_manual(values = pal) +
labs(y = "Estimated Cases",
x="",
title = paste0("Estimated Cases by Version in ", toupper(state))) +
scale_color_manual(values = c('covidestim' = 'darkblue'), labels = 'Covidestim Estimates',
name = '') +
guides(color = guide_legend(override.aes = list(linewidth =2)))

ggsave(here::here(paste0('thesis/figure/', state, '_pb_compared_to_covidestim.pdf')),
height=15, width = 15, dpi=400)
ggsave(here::here(paste0('presentation/figure/', state, '_pb_compared_to_covidestim.jpeg')),
height=19, width = 15, dpi=400)
################################
# figure for presentation
#################################
joined %>%
filter(grepl("Suffolk|Berkshire|Worcester", county_name)) %>%
filter(biweek >=6) %>%
# covidestim does not report estimates for Nantucket
filter(!grepl("Nantucket", county_name )) %>%
group_by(biweek) %>%
mutate(xmin = min(date),
xmax = max(date)) %>%
ungroup() %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub,
fill = vlabel),
alpha = .5,
show.legend=FALSE) +
geom_linerange(aes(xmin = xmin,
xmax=xmax,
y = infections,
color = 'covidestim')) +
facet_grid(county_name~vlabel, scales = "free_y",
labeller=as_labeller(
latex2exp::TeX, default = label_parsed)) +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "2 months",
date_labels = "%b %Y") +
theme_c(axis.text.x = element_text(angle = 40, size = 11),
axis.text.y=element_text(size=8),
plot.title=element_text(face = "bold",
hjust = .5,
size = 20,
margin=margin(5,5,15,5,'pt')),
strip.background = element_rect(fill = "#393939"),
strip.text = element_text(color = "white", size = 16),
strip.text.y = element_text(margin = margin(5,5,5,5,'pt'),
size = 14,
face="bold"),
strip.text.x = element_text(margin = margin(5,3,5,3,'pt'),
face = "bold",
size = 12),
plot.subtitle = ggtext::element_markdown(size = 25,
margin = margin(3,5,10,5,'pt'),
face = "italic"),
legend.position = "top",
legend.text =element_text(size = 14)) +
scale_fill_manual(values = pal) +
labs(x = "",
y = "Estimated Infections",
title = paste0("Estimated Infections by Version in 3 Counties in ", toupper(state))) +
scale_color_manual(values = c('covidestim' = 'darkblue'), labels = 'Covidestim Estimates',
name = '') +
guides(color = guide_legend(override.aes = list(linewidth =2)))

ggsave(here('presentation/figure/ma_pb_compared_to_observed_3_counties.jpeg'),
height=10, width = 16.5, dpi=600)
###########################
# JUST SUFFOLK IN WEEK 10
###########################
suffolk_10_vals <- joined %>%
filter(grepl("Suffolk", county_name)) %>%
filter(biweek==10 & version=="v1") %>%
select(exp_cases_lb,exp_cases_ub ) %>%
distinct()
joined %>%
filter(grepl("Suffolk", county_name)) %>%
filter( version=="v1") %>%
mutate(color = ifelse(biweek ==10, "yes", NA)) %>%
filter(biweek >= 8 & biweek <=22) %>%
# covidestim does not report estimates for Nantucket
filter(!grepl("Nantucket", county_name )) %>%
group_by(biweek) %>%
mutate(xmin = min(date),
xmax = max(date)) %>%
ungroup() %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub),
show.legend=FALSE,
fill="#AEC3CC",
alpha=.85) +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub,
fill=color),
alpha = .4,
show.legend=FALSE) +
scale_fill_manual(na.value=NA, values=c("red")) +
geom_linerange(xmin=as_date("2021-05-07"), xmax=as_date("2021-05-20"),
y=suffolk_10_vals$exp_cases_lb,
color = '#DBC37F',
linewidth = 1.5,
alpha=1) +
geom_linerange(xmin=as_date("2021-05-07"), xmax=as_date("2021-05-20"),
y=suffolk_10_vals$exp_cases_ub,
color = '#DBC37F',
linewidth =1.5,
alpha=1) +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "2 months",
date_labels = "%b %Y") +
theme_c(axis.text.x = element_text(angle = 0, size = 5),
axis.text.y=element_text(size=5),
axis.title = element_text(size=7),
plot.title=element_text(face = "bold",
hjust = .5,
size = 20,
margin=margin(5,5,15,5,'pt')),
strip.background = element_rect(fill = "#393939"),
strip.text = element_text(color = "white", size = 16),
strip.text.y = element_text(margin = margin(5,5,5,5,'pt'),
size = 14,
face="bold"),
strip.text.x = element_text(margin = margin(5,3,5,3,'pt'),
face = "bold",
size = 12),
plot.subtitle = ggtext::element_markdown(size = 25,
margin = margin(3,5,10,5,'pt'),
face = "italic"),
legend.position = "top",
legend.text =element_text(size = 14)) +
labs(x = "",
y = "Estimated Infections")

ggsave(here('presentation/figure/suffolk_biweek_10.jpeg'),
height=2, width =3, dpi=400)
joined %>%
filter(grepl("Berkshire", county_name)) %>%
filter( version=="v2") %>%
# mutate(color = ifelse(biweek ==10, "yes", NA)) %>%
# filter(biweek >= 8 & biweek <=22) %>%
# covidestim does not report estimates for Nantucket
filter(!grepl("Nantucket", county_name )) %>%
group_by(biweek) %>%
mutate(xmin = min(date),
xmax = max(date)) %>%
ungroup() %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub),
show.legend=FALSE,
fill="#386A7E",
alpha=.85) +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "2 months",
date_labels = "%b %Y") +
theme_c(axis.text.x = element_text(angle = 0, size = 14),
axis.text.y=element_text(size=7),
axis.title = element_text(size=17),
plot.title=element_text(face = "bold",
hjust = .5,
size = 20,
margin=margin(5,5,15,5,'pt')),
strip.background = element_rect(fill = "#393939"),
strip.text = element_text(color = "white", size = 16),
strip.text.y = element_text(margin = margin(5,5,5,5,'pt'),
size = 14,
face="bold"),
strip.text.x = element_text(margin = margin(5,3,5,3,'pt'),
face = "bold",
size = 12),
plot.subtitle = ggtext::element_markdown(size = 25,
margin = margin(3,5,10,5,'pt'),
face = "italic"),
legend.position = "top",
legend.text =element_text(size = 14)) +
labs(x = "",
y = "Estimated Infections")

ggsave(here('presentation/figure/example_output.jpeg'),
height=6, width =10, dpi=400)
Summarizing Concordance with Covidestim
ma_concordance <- joined %>%
filter(biweek >=6) %>%
# covidestim does not report estimates for Nantucket
filter(!grepl("Nantucket", county_name )) %>%
select(-date) %>%
distinct() %>%
select(exp_cases_lb, exp_cases_ub, county_name, biweek, vlabel, infections) %>%
mutate(in_interval = case_when(
exp_cases_lb <= infections & infections <= exp_cases_ub ~ "In Interval",
exp_cases_lb > infections ~ "Below Interval",
exp_cases_ub < infections ~ "Above Interval"
) ) %>%
group_by(in_interval, county_name, vlabel) %>%
summarize(n_per_county=n()) %>%
group_by(vlabel, in_interval) %>%
summarize(n_per_version = sum(n_per_county)) %>%
group_by(vlabel) %>%
mutate(total = sum(n_per_version)) %>%
ungroup() %>%
mutate(prop=n_per_version/total)
## `summarise()` has grouped output by 'in_interval', 'county_name'. You can
## override using the `.groups` argument.
## `summarise()` has grouped output by 'vlabel'. You can override using the
## `.groups` argument.
#######################################################################
# CONCORDANCE BY ABOVE, BELOW, WITHIN FOR MOST CONCORDANT VERSION
#######################################################################
ma_concordance_by_county <- joined %>%
filter(biweek >=6) %>%
# covidestim does not report estimates for Nantucket
filter(!grepl("Nantucket", county_name )) %>%
select(-date) %>%
mutate(county_name = gsub("$", "", county_name, fixed=TRUE)) %>%
select(exp_cases_lb, exp_cases_ub, county_name, biweek, vlabel, infections) %>%
distinct() %>%
mutate(in_interval = case_when(
exp_cases_lb <= infections & infections <= exp_cases_ub ~ "In Interval",
exp_cases_lb > infections ~ "Below Interval",
exp_cases_ub < infections ~ "Above Interval"
) ) %>%
group_by(in_interval, county_name, vlabel) %>%
summarize(n_per_county=n()) %>%
group_by(county_name,vlabel) %>%
mutate(total = sum(n_per_county),
prop = n_per_county/total) %>%
mutate(state="MA")
## `summarise()` has grouped output by 'in_interval', 'county_name'. You can
## override using the `.groups` argument.
#########################################################
# CONCORDANCE BY COUNTY (NOT VERSION)
#########################################################
best_by_county <- ma_concordance_by_county %>%
group_by(county_name) %>%
filter(in_interval=="In Interval" )%>%
summarize(vlabel = vlabel[which.max(prop)],
m = max(prop))
ma_concordance_by_county %>%
inner_join(best_by_county) %>%
ggplot(aes(x= fct_reorder(county_name,m),
fill = in_interval, y =prop)) +
geom_bar(stat="identity",position="dodge") +
theme_c() +
coord_flip()+
scale_fill_manual(values=c("In Interval" = "#79D2AF",
"Above Interval" = "#D2AF79",
"Below Interval"="#7997D2")) +
labs(x="",
y= "Proportion",
fill = "Covidestim Median:",
title = "Proportion Where Covidestim Median\nWas Within, Above, or Below the Median",
subtitle = "For Implementation with Greatest Concordance") +
theme_c() +
theme(legend.position="right",
legend.title = element_text(face="bold", size = 15),
legend.text= element_text( size = 15),
legend.spacing.y = unit(3, 'pt'),
axis.text.y = element_text(size = 17, hjust=1))
## Joining with `by = join_by(county_name, vlabel)`

ggsave(here('presentation/figure/covidestim_above_below_by_county_ma.jpeg'),height=7, width=12)
ma_concordance_by_county %>%
filter(vlabel=="$P(S_1|untested)$ Centered at Emp. Value")
##############################################
# stacked bar chart of concordance by county
##############################################
ma_concordance_by_county %>%
filter(vlabel=="$P(S_1|untested)$ Centered at Emp. Value") %>%
group_by(county_name) %>%
mutate(m = prop[which(in_interval=="In Interval")]) %>%
ungroup() %>%
mutate(in_interval = factor(in_interval, levels = c("Above Interval",
"In Interval", "Below Interval"))) %>%
ggplot(aes(x= fct_reorder(county_name,m),
fill = (in_interval), y =prop)) +
geom_bar(stat="identity",position="stack") +
theme_c() +
coord_flip()+
scale_fill_manual(values=c("In Interval" = "#79D2AF",
"Above Interval" = "#D2AF79",
"Below Interval"="#7997D2")) +
labs(x="",
y= "Proportion",
fill = "Covidestim Median:",
title = "Massachusetts: Proportion Where Covidestim Median\nWas Within, Above, or Below the Median",
subtitle = TeX("For Implementation with $P(S_1|untested)$ Centered at Survey Value")) +
theme_c() +
theme(legend.position="right",
legend.title = element_text(face="bold", size = 15),
legend.text= element_text( size = 15),
legend.spacing.y = unit(3, 'pt'),
axis.text.y = element_text(size = 17, hjust=1))

ggsave(here('thesis/figure/covidestim_above_below_by_county_ma.pdf'),height=7, width=12)
#########################################################
# CONCORDANCE BY COUNTY (MOST CONC. VERSION)
#########################################################
ma_concordance_by_county %>%
filter(in_interval=="In Interval") %>%
group_by(county_name) %>%
summarize(max_prop =max(prop),
type = vlabel[which.max(prop)]) %>%
ungroup() %>%
ggplot(aes(y = fct_reorder(county_name, max_prop),
fill = type,
x= max_prop)) +
geom_bar(stat="identity",position="dodge", alpha=.8)+
scale_fill_manual(values = pal,labels=TeX(names(pal))) +
theme_c(legend.position="right",
legend.text=element_text(hjust=0, size = 15),
axis.title=element_text(size=16),
axis.text.x=element_text(size=11),
axis.text.y = element_text(size=14)) +
labs(y = "", x="Proportion Containing Covidestim Median",
fill="")

ggsave(here('presentation/figure/best_version_by_county_ma.jpeg'), height=8,width=11)
in_interval <- ma_concordance %>%
filter(in_interval == "In Interval") %>%
mutate(n_interval = n_per_version) %>%
select(vlabel, n_interval)
labels <- ma_concordance %>%
filter(in_interval=="In Interval") %>%
arrange((prop)) %>%
pull(vlabel) %>%
unique() %>%
as.character()
#########################################################
# CONCORDANCE BY VERSION
#########################################################
ma_concordance %>%
left_join(in_interval) %>%
mutate(in_interval=factor(in_interval,
levels=c(
"In Interval",
"Above Interval",
"Below Interval"))) %>%
ggplot(aes(y =fct_reorder(vlabel,n_interval),
x = prop,
fill=in_interval)) +
#facet_wrap(~fct_reorder((vlabel),m), ncol=1) +
geom_bar(stat="identity",position="stack", color="darkgray", linewidth=.2) +
# viridis::scale_fill_viridis(option="mako", begin=.2, discrete=TRUE) +
theme_c(legend.position="right",
legend.title = element_text(face="bold", size = 15),
legend.text= element_text( size = 15),
legend.spacing.y = unit(3, 'pt'),
axis.text.y = element_text(size = 17, hjust=1)) +
theme(
plot.subtitle = element_text(size=18, face='italic', margin=margin(4,0,4,0))) +
scale_y_discrete(labels=unname(TeX(labels))) +
labs(y ="",
x="Proportion",
fill = "Covidestim Median:",
title = "Where Covidestim Medians Fall\nRelative to Probabilistic Bias Intervals",
subtitle = "By Version, in Massachusetts") +
scale_fill_manual(values=c("Below Interval"="#7997D2",
"In Interval" = "#79D2AF",
"Above Interval" = "#D2AF79")) +
guides(fill=guide_legend(byrow=TRUE)) +
scale_x_continuous(n.breaks = 7, expand=c(0,0),limits=c(-.05,1))
## Joining with `by = join_by(vlabel)`

ggsave(here('thesis/figure/summarize-concordance-mass-counties.pdf'), width=16,height=7)
joined %>%
filter(biweek >=6) %>%
# covidestim does not report estimates for Nantucket
filter(!grepl("Nantucket", county_name )) %>%
group_by(biweek) %>%
mutate(xmin = min(date),
xmax = max(date)) %>%
ungroup() %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub,
fill = vlabel),
alpha = .5,
show.legend=FALSE) +
geom_linerange(aes(xmin = xmin,
xmax=xmax,
y = infections,
color = 'covidestim')) +
facet_grid(county_name~vlabel, scales = "free_y",
labeller=as_labeller(
latex2exp::TeX, default = label_parsed)) +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "2 months",
date_labels = "%b %Y") +
theme_c(axis.text.x = element_text(angle = 60, size = 16),
plot.title=element_text(face = "bold",
hjust = .5,
size = 20,
margin=margin(5,5,15,5,'pt')),
strip.background = element_rect(fill = "#393939"),
strip.text = element_text(color = "white", size = 16),
strip.text.y = element_text(margin = margin(5,5,5,5,'pt'),
size = 14,
face="bold"),
strip.text.x = element_text(margin = margin(5,3,5,3,'pt'),
face = "bold",
size = 18),
plot.subtitle = ggtext::element_markdown(size = 25,
margin = margin(3,5,10,5,'pt'),
face = "italic"),
legend.position = "top",
legend.text =element_text(size = 14)) +
scale_fill_manual(values = pal) +
labs(y = "Estimated Infections",
x="",
title = paste0("Estimated Infections by Version in ", toupper(state))) +
scale_color_manual(values = c('covidestim' = 'darkblue'), labels = 'Covidestim Estimates',
name = '') +
guides(color = guide_legend(override.aes = list(linewidth =2)))

# hampshire
county_cols <- viridis(length(unique(joined$county_name))- 1, option = "mako")
county_cols <- c("red",county_cols )
names(county_cols ) <- c("$Hampshire$", unique(joined$county_name)[ unique(joined$county_name) != "$Hampshire$"])
joined %>%
mutate(posrate = positive/total,
size = ifelse(grepl("Hampshire", county_name),'Hampshire', 'not')) %>%
select(biweek, county_name, posrate,size) %>%
distinct() %>%
ggplot(aes(x=biweek,y=posrate, color = county_name,
linewidth=size, alpha = size)) +
geom_line() +
scale_linewidth_manual(values=c('Hampshire'=2, 'not'=.5)) +
scale_alpha_manual(values = c('Hampshire' = 1, 'not'=.4)) +
scale_color_manual(values=county_cols, labels = TeX(names(county_cols))) +
theme_c(legend.position = "right",
legend.title = element_text(size =16, face="bold"),
axis.text.x = element_text(size = 9)) +
guides(linewidth="none",
alpha = "none",
color = guide_legend(override.aes = list(linewidth = 2.5))) +
labs(color = "County Name",
y = "Positivity Rate",
x= "Biweek",
title="Hampshire County Positivity Rate\nCompared to Other Counties in Massachusetts") +
scale_x_continuous(n.breaks = 7)

ggsave(here::here('thesis/figure/hampshire.pdf'),
height=8, width = 14, dpi=400)
ggsave(here::here('presentation/figure/hampshire.pdf'),
height=8, width = 14, dpi=400)
joined %>%
mutate(county_name = gsub("$","", county_name,fixed=TRUE)) %>%
select(-date) %>%
distinct() %>%
mutate(in_interval = infections <= exp_cases_ub & infections >= exp_cases_lb) %>%
filter(!is.na(in_interval)) %>%
group_by(vlabel, in_interval, county_name) %>%
summarize(n=n()) %>%
group_by(county_name, vlabel) %>%
mutate(tot=sum(n),
prop_contained = n/tot) %>%
filter(in_interval) %>%
group_by(county_name) %>%
mutate(m = max(prop_contained)) %>%
ggplot(aes(x=fct_reorder(county_name, m, .desc=TRUE),
y = prop_contained, color = vlabel)) +
geom_point(size = 2.3) +
labs(y = "Proportion of Biweeks Containing Covidestim Estimate",
x = "County",
title = "Comparing the Proportion of Biweeks\nWhere Probabilistic Bias Interval Contains Covidestim Estimate")+
scale_color_manual(values = pal, labels =TeX(names(pal)), name ='') +
theme_c(axis.text.x = element_text(angle = 15, size = 14),
legend.position ="right",
axis.title = element_text(size = 16))
## `summarise()` has grouped output by 'vlabel', 'in_interval'. You can override
## using the `.groups` argument.

ggsave(here::here(paste0('thesis/figure/',
state,
'_pb_compared_to_covidestim_proportions.pdf')),
height=8, width = 15, dpi=400)
#
# joined %>%
# mutate(county_name = gsub("$","", county_name,fixed=TRUE)) %>%
# select(-date) %>%
# distinct() %>%
# mutate(in_interval = infections <= exp_cases_ub & infections >= exp_cases_lb,
# posrate=positive/population) %>%
# group_by(county_name) %>%
# mutate(posrate=mean(posrate)) %>%
# filter(!is.na(in_interval)) %>%
# group_by(vlabel, in_interval, county_name, posrate) %>%
# summarize(n=n()) %>%
# group_by(county_name, vlabel,posrate) %>%
# mutate(tot=sum(n),
# prop_contained = n/tot) %>%
# ggplot(aes(x=posrate, y =prop_contained, color = vlabel)) +
# geom_point()
Michigan
state <- "mi"
state_corrected_path <- here("analysis/results/adj_biweekly_county/", state, "/")
################################
# read fips
################################
fips <- read_tsv(here::here("data/demographic/county_fips.tsv")) %>%
rename_with(tolower)
## Rows: 3232 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): FIPS, Name, State
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
fips <- fips %>%
mutate(fips =ifelse( name %in% c("Dukes", "Nantucket") & state == "MA",
"25007,25019", fips),
name = ifelse(name %in% c("Dukes", "Nantucket") & state == "MA",
"Nantucket Dukes", name)) %>%
distinct() %>%
select(fips,county_name = name)
################################
# ESTIMATED
################################
dates <- readRDS(here("data/date_to_biweek.RDS")) %>%
group_by(biweek)
corrected <- map_df(priors_versions, ~readRDS(
paste0(state_corrected_path, "adj_",
.x,
".RDS")) %>%
mutate(version = .x)) %>%
left_join(dates, relationship = "many-to-many") %>%
inner_join(fips)
## Joining with `by = join_by(biweek)`
## Joining with `by = join_by(fips)`
corrected <- corrected %>%
left_join(versions)
## Joining with `by = join_by(version)`
covidestim <- readRDS(here("data/covidestim/covidestim_biweekly_all_counties.RDS")) %>%
select(-c(date,week)) %>%
distinct()
joined <- corrected %>%
left_join(covidestim, relationship="many-to-many") %>%
left_join(versions)
## Joining with `by = join_by(biweek, fips)`
## Joining with `by = join_by(version, vlabel)`
# for facet_wrap latex formatting
# joined <- joined %>%
# # mutate(county_name = gsub(" ", "~", county_name)) %>%
# mutate(county_name = paste0('$"', county_name,'"$' ))
# ,
# county_name= paste0("$ $", county_name))
#
#
pal <- c("#10BAC5", "#1B10C5", "#EFB719", "#900C3F")
names(pal) <- c(#"$observed$",
'$P(S_1|untested)$ Centered at Emp. Value',
'$P(S_1|untested)$ and $\\beta$ Centered at Emp. Values',
"$Priors\\,Do\\,Not\\,Vary\\,by\\,County\\,and\\,Date$",
"$\\beta$ Centered at Emp. Value"
)
n_counties <- joined$fips %>% unique() %>% length()
size <- ceiling(n_counties/3)
ind <- c(rep(1, size), rep(2, size), rep(3, n_counties - 2*size))
groups <- tibble(group = ind, fips = unique(joined$fips))
#
# name_f <- function(variable, x) {
# if (variable == "county_name") {
# return(as.character(x))
# }
# else {
# plyr::llply( as.character(x), function(x) parse(x))
# }
# }
joined %>%
left_join(groups) %>%
group_by(group) %>%
group_split() %>%
iwalk( ~ {
.x %>%
filter(biweek >=6) %>%
group_by(biweek) %>%
mutate(xmin = min(date),
xmax = max(date)) %>%
ungroup() %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub,
fill = vlabel),
alpha = .5,
show.legend=FALSE) +
geom_linerange(aes(xmin = xmin,
xmax=xmax,
y = positive,
color = 'obs')) +
facet_grid(county_name ~ vlabel,
labeller = labeller(vlabel =as_labeller(TeX, default=label_parsed)),
scales="free_y") +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "2 months",
date_labels = "%b %Y") +
theme_c(axis.text.x = element_text(angle = 30, size = 16),
plot.title=element_text(face = "bold",
hjust = .5,
size = 20,
margin=margin(5,5,15,5,'pt')),
strip.background = element_rect(fill = "#393939"),
strip.text = element_text(color = "white", size = 16),
strip.text.y = element_text(margin = margin(5,5,5,5,'pt'),
size = 14,
face="bold"),
strip.text.x = element_text(margin = margin(5,3,5,3,'pt'),
face = "bold",
size = 10),
axis.text = element_text(size = 8),
plot.subtitle = ggtext::element_markdown(size = 25,
margin = margin(3,5,10,5,'pt'),
face = "italic"),
legend.position = "top",
legend.text =element_text(size = 14)) +
scale_fill_manual(values = pal) +
labs(y = "Estimated Infections",
x="",
title = paste0("Estimated Infections by Version in ", toupper(state))) +
scale_color_manual(values = c('obs' = 'red'), labels = 'Positive Tests',
name = '') +
guides(color = guide_legend(override.aes = list(linewidth =2)))
ggsave(here::here(paste0('thesis/figure/', state, .y, '_pb_compared_to_observed.pdf')),
height=22, width = 15, dpi=400) }
)
## Joining with `by = join_by(fips)`
joined %>%
group_by(biweek) %>%
filter(biweek >=6) %>%
mutate(xmin = min(date),
xmax = max(date)) %>%
ungroup() %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub,
fill = vlabel),
alpha = .5,
show.legend=TRUE) +
# geom_linerange(aes(xmin = xmin,
# xmax=xmax,
# y = exp_cases_median,
# color =vlabel)) +
facet_wrap(~county_name, scales = "free_y",
labeller=labeller(vlabel =as_labeller(TeX, default=label_parsed)),
ncol = 3) +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "2 months",
date_labels = "%b %Y") +
theme_c(axis.text.x = element_text(angle = 60, size = 16),
plot.title=element_text(face = "bold",
hjust = .5,
size = 22,
margin=margin(5,5,15,5,'pt')),
strip.background = element_rect(fill = "#393939"),
strip.text = element_text(color = "white", size = 16),
strip.text.y = element_text(margin = margin(5,5,5,5,'pt'),
size = 14,
face="bold"),
strip.text.x = element_text(margin = margin(5,3,5,3,'pt'),
face = "bold",
size = 10),
plot.subtitle = ggtext::element_markdown(size = 25,margin = margin(3,5,10,5,'pt'),
face = "italic"),
legend.position = "top",
legend.text =element_text(size = 14)) +
# scale_color_manual(values = pal, labels =TeX(names(pal))) +
scale_fill_manual(values = pal, labels =TeX(names(pal)),
name ='') +
labs(y = "Estimated Infections",x="",
title = paste0("Estimated Infections by Version in Michigan"))

ggsave(here::here(paste0('thesis/figure/', state, '_pb_compare_versions.pdf')),
height=17, width = 17, dpi=400)
joined %>%
mutate(county_name = gsub("$","", county_name,fixed=TRUE),
county_name = gsub('"', '', county_name, fixed=TRUE)) %>%
select(-date) %>%
distinct() %>%
mutate(in_interval = infections <= exp_cases_ub & infections >= exp_cases_lb) %>%
filter(!is.na(in_interval)) %>%
group_by(vlabel, in_interval, county_name) %>%
summarize(n=n()) %>%
group_by(county_name, vlabel) %>%
mutate(tot=sum(n),
prop_contained = n/tot) %>%
filter(in_interval) %>%
group_by(county_name) %>%
mutate(m = max(prop_contained)) %>%
ggplot(aes(x=fct_reorder(county_name, m, .desc=TRUE),
y = prop_contained, color = vlabel)) +
geom_point(size = 3.5) +
labs(y = "Proportion of Biweeks Containing Covidestim Estimate",
x = "County",
title = "Comparing the Proportion of Biweeks Where Probabilistic Bias Interval Contains Covidestim Estimate")+
scale_color_manual(values = pal, labels =TeX(names(pal)), name ='') +
theme_c(axis.text.x = element_text( size =16, angle =60, hjust=.95,vjust=.9),
axis.text.y = element_text(size = 12),
legend.text = element_text(size = 28),
axis.title= element_text(size = 25),
legend.position ="top",
plot.title = element_text(size = 38, face="bold", margin = margin(2,0,3,0))) +
scale_y_continuous(n.breaks = 6) +
guides(color = guide_legend(override.aes = list(size = 8), nrow=4,
byrow=TRUE))
## `summarise()` has grouped output by 'vlabel', 'in_interval'. You can override
## using the `.groups` argument.

ggsave(here::here(paste0('thesis/figure/',
state,
'_pb_compared_to_covidestim_proportions.pdf')),
height=14, width = 28, dpi=400)
Heatmap
######################################################
# HEATMAP OF RATIO OF ESTIMATED TO OBSERVED
######################################################
heatmap_dat <- corrected %>%
filter(version=="v2") %>%
group_by(biweek, county_name, exp_cases_median, positive) %>%
summarize(date = min(date)) %>%
ungroup()
## `summarise()` has grouped output by 'biweek', 'county_name',
## 'exp_cases_median'. You can override using the `.groups` argument.
# group similar states together
wide <- heatmap_dat %>%
mutate(ratio= exp_cases_median/positive) %>%
select(-c(exp_cases_median, positive)) %>%
pivot_wider(names_from=county_name,values_from =ratio)
heatmap_dat %>%
mutate(ratio=exp_cases_median/positive) %>%
mutate(date_lab = format(as.POSIXct(date),format="%b %Y")) %>%
group_by(county_name) %>%
mutate(m = median(ratio, na.rm=TRUE)) %>%
ungroup() %>%
mutate(ratio=ifelse(ratio>40, 40, ratio)) %>%
ggplot(aes(x=fct_reorder(date_lab, date),
y = fct_reorder(county_name,m),
fill =ratio)) +
geom_tile() +
# scale_x_discrete(breaks=breaks_date, labels = breaks_date) +
viridis::scale_fill_viridis(option="rocket", direction=-1, na.value="white",
breaks = c(10,20,30,40), labels = c("10","20","30","40 or greater")) +
labs(x="",
y= "County",
fill = "Ratio of Estimated Cases\nto Observed",
title = "Ratio of Median Estimated Infections to Observed Infections\nfor Counties in Michigan",
subtitle = TeX("Implementation with $P(S_1|untested)$ Centered at Survey Value")) +
theme_c() +
theme(axis.text.x = element_text(size=15, angle=30),
axis.text.y = element_text(size = 17),
axis.title.x = element_text(size=19),
axis.title.y = element_text(size=19),
legend.title = element_text(face="bold", size=20, margin=margin(0,0,9,0)),
plot.title =element_text(size=22),
plot.subtitle = element_text(size=20))

# which biweeks are highest?
heatmap_dat %>%
mutate(ratio=exp_cases_median/positive) %>%
group_by(biweek) %>%
summarize(m=median(ratio),
date=min(date)) %>%
arrange(desc(m))
ggsave(here('thesis/figure/mi_county_heatmap_ratio_est_observed.pdf'),
height=18,
width =13,
dpi=400)
Summarizing Concordance with Covidestim
By Version
mi_concordance <- joined %>%
filter(biweek >=6) %>%
# covidestim does not report estimates for Nantucket
filter(!grepl("Nantucket", county_name )) %>%
select(-date) %>%
distinct() %>%
select(exp_cases_lb, exp_cases_ub, county_name, biweek, vlabel, infections) %>%
distinct() %>%
mutate(in_interval = case_when(
exp_cases_lb <= infections & infections <= exp_cases_ub ~ "In Interval",
exp_cases_lb > infections ~ "Below Interval",
exp_cases_ub < infections ~ "Above Interval"
) ) %>%
group_by(in_interval, county_name, vlabel) %>%
summarize(n_per_county=n()) %>%
group_by(vlabel, in_interval) %>%
summarize(n_per_version = sum(n_per_county)) %>%
group_by(vlabel) %>%
mutate(total = sum(n_per_version)) %>%
ungroup() %>%
mutate(prop=n_per_version/total)%>%
group_by(vlabel) %>%
mutate(m =prop[which(in_interval=="In Interval")]) %>%
ungroup()
## `summarise()` has grouped output by 'in_interval', 'county_name'. You can
## override using the `.groups` argument.
## `summarise()` has grouped output by 'vlabel'. You can override using the
## `.groups` argument.
labels <- mi_concordance %>%
arrange((m)) %>%
pull(vlabel) %>%
unique() %>%
as.character()
mi_concordance %>%
ggplot(aes(y =fct_reorder(vlabel,m),
x = prop,
fill=in_interval)) +
#facet_wrap(~fct_reorder((vlabel),m), ncol=1) +
geom_bar(stat="identity",
position="dodge",
color="darkgray",
linewidth=.2) +
# viridis::scale_fill_viridis(option="mako", begin=.2, discrete=TRUE) +
theme_c(legend.position="right",
legend.title = element_text(face="bold", size = 15),
legend.text= element_text( size = 15),
legend.spacing.y = unit(3, 'pt'),
axis.text.y = element_text(size = 17, hjust=1)) +
scale_y_discrete(labels=unname(TeX(labels))) +
labs(y ="",
x="Proportion",
fill = "Covidestim Median:") +
scale_fill_manual(values=c("In Interval" = "#79D2AF",
"Above Interval" = "#D2AF79",
"Below Interval"="#7997D2")) +
guides(fill=guide_legend(byrow=TRUE)) +
scale_x_continuous(n.breaks = 7,
expand=c(0,0),
limits=c(-.05,1))

By County
#######################################################################
# CONCORDANCE BY ABOVE, BELOW, WITHIN FOR MOST CONCORDANT VERSION
#######################################################################
mi_concordance_by_county <- joined %>%
filter(biweek >=6) %>%
# covidestim does not report estimates for Nantucket
select(-date) %>%
mutate(county_name = gsub("$", "", county_name, fixed=TRUE)) %>%
distinct() %>%
select(exp_cases_lb, exp_cases_ub, county_name, biweek, vlabel, infections) %>%
mutate(in_interval = case_when(
exp_cases_lb <= infections & infections <= exp_cases_ub ~ "In Interval",
exp_cases_lb > infections ~ "Below Interval",
exp_cases_ub < infections ~ "Above Interval"
) ) %>%
group_by(in_interval, county_name, vlabel) %>%
summarize(n_per_county=n()) %>%
group_by(county_name,vlabel) %>%
mutate(total = sum(n_per_county),
prop = n_per_county/total) %>%
mutate(state="MI")%>%
group_by(county_name)
## `summarise()` has grouped output by 'in_interval', 'county_name'. You can
## override using the `.groups` argument.
#########################################################
# CONCORDANCE BY COUNTY FOR MOST CONCORDANT VERSION
#########################################################
# mi_concordance_by_county %>%
# ggplot(aes(y = fct_reorder(county_name, max_prop),
# fill = type,
# x= max_prop)) +
# geom_bar(stat="identity",position="dodge", alpha=.8)+
# scale_fill_manual(values = pal,labels=TeX(names(pal))) +
# theme_c(legend.position="right",
# legend.text=element_text(hjust=0, size = 15),
# axis.title=element_text(size=16),
# axis.text.x=element_text(size=11),
# axis.text.y = element_text(size=14)) +
# labs(y = "", x="Proportion Containing Covidestim Median",
# fill="")
#########################################################
# CONCORDANCE BY COUNTY (NOT VERSION)
#########################################################
best_by_county <- mi_concordance_by_county %>%
group_by(county_name) %>%
filter(in_interval=="In Interval" )%>%
summarize(vlabel = vlabel[which.max(prop)],
m=max(prop))
mi_concordance_by_county %>%
inner_join(best_by_county) %>%
ggplot(aes(x= fct_reorder(county_name,m),
fill = in_interval, y =prop)) +
geom_bar(stat="identity",position="dodge") +
theme_c() +
coord_flip()+
scale_fill_manual(values=c("In Interval" = "#79D2AF",
"Above Interval" = "#D2AF79",
"Below Interval"="#7997D2")) +
labs(x="",
y= "Proportion",
fill = "Covidestim Median:",
title = "Proportion Where Covidestim Median\nWas Within, Above, or Below the Median",
subtitle = "For Implementation with Greatest Concordance")+
theme_c(legend.position="right",
legend.title = element_text(face="bold", size = 15),
legend.text= element_text( size = 15),
legend.spacing.y = unit(3, 'pt'),
axis.text.y = element_text(size = 17, hjust=1))
## Joining with `by = join_by(county_name, vlabel)`

ggsave(here('presentation/figure/covidestim_above_below_by_county_mi.pdf'), height=8,width=11)
##############################################
# stacked bar chart of concordance by county
##############################################
mi_concordance_by_county %>%
filter(vlabel=="$P(S_1|untested)$ Centered at Emp. Value") %>%
group_by(county_name) %>%
mutate(m = prop[which(in_interval=="In Interval")]) %>%
ungroup() %>%
mutate(in_interval = factor(in_interval, levels = c("Above Interval",
"In Interval", "Below Interval"))) %>%
ggplot(aes(x= fct_reorder(county_name,m),
fill = (in_interval), y =prop)) +
geom_bar(stat="identity",position="stack") +
theme_c() +
coord_flip()+
scale_fill_manual(values=c("In Interval" = "#79D2AF",
"Above Interval" = "#D2AF79",
"Below Interval"="#7997D2")) +
labs(x="",
y= "Proportion",
fill = "Covidestim Median:",
title = "Michigan: Proportion Where Covidestim Median\nWas Within, Above, or Below the Median",
subtitle = TeX("For Implementation with $P(S_1|untested)$ Centered at Survey Value")) +
theme_c() +
theme(legend.position="right",
legend.title = element_text(face="bold", size = 15),
legend.text= element_text( size = 15),
legend.spacing.y = unit(3, 'pt'),
axis.text.y = element_text(size = 17, hjust=1))

ggsave(here('thesis/figure/covidestim_above_below_by_county_mi.pdf'),height=7, width=12)
mi_concordance_by_county<- mi_concordance_by_county %>%
mutate(state="MI")
ma_concordance_by_county <- ma_concordance_by_county %>%
mutate(state="MA")
#
# ma_concordance_by_county %>%
# bind_rows(mi_concordance_by_county) %>%
# ggplot(aes(x=prop)) +
# geom_histogram(bins=15) +
# facet_grid(state~vlabel, scales="free_y")
#########################################################
# CONCORDANCE BY COUNTY (NOT VERSION)
#########################################################
best_by_county <- mi_concordance_by_county %>%
group_by(county_name) %>%
filter(in_interval=="In Interval" )%>%
summarize(vlabel = vlabel[which.max(prop)])
mi_concordance_by_county %>%
inner_join(best_by_county) %>%
ggplot(aes(x= fct_reorder(county_name,m),
fill = in_interval, y =prop)) +
geom_bar(stat="identity",position="dodge") +
theme_c() +
coord_flip()+
scale_fill_manual(values=c("In Interval" = "#79D2AF",
"Above Interval" = "#D2AF79",
"Below Interval"="#7997D2")) +
labs(x="",
y= "Proportion",
fill = "Covidestim Median:",
title = "Proportion Where Covidestim Median\nWas Within, Above, or Below the Median",
subtitle = "For Implementation with Greatest Concordance")+
theme_c(legend.position="right",
legend.title = element_text(face="bold", size = 15),
legend.text= element_text( size = 15),
legend.spacing.y = unit(3, 'pt'),
axis.text.y = element_text(size = 12, hjust=1))
ggsave(here('presentation/figure/covidestim_above_below_by_county_mi.pdf'), height=15,width=11)
#########################################################
# CONCORDANCE BY COUNTY (MOST CONC. VERSION)
#########################################################
mi_concordance_by_county %>%
filter(in_interval=="In Interval") %>%
group_by(county_name) %>%
summarize(max_prop =max(prop),
type = vlabel[which.max(prop)]) %>%
ungroup() %>%
ggplot(aes(y = fct_reorder(county_name, max_prop),
fill = type,
x= max_prop)) +
geom_bar(stat="identity",position="dodge", alpha=.8)+
scale_fill_manual(values = pal,labels=TeX(names(pal))) +
theme_c(legend.position="right",
legend.text=element_text(hjust=0, size = 15),
axis.title=element_text(size=16),
axis.text.x=element_text(size=11),
axis.text.y = element_text(size=14)) +
labs(y = "", x="Proportion Containing Covidestim Median",
fill="")

ggsave(here('presentation/figure/best_version_by_county_mi.pdf'), height=8,width=11)
concordance_both <- mi_concordance %>%
bind_rows(ma_concordance) %>%
group_by(vlabel, in_interval) %>%
summarize(n_per_version_both = sum(n_per_version)) %>%
group_by(vlabel) %>%
mutate(total = sum(n_per_version_both)) %>%
mutate(prop = n_per_version_both/total) %>%
mutate(m = n_per_version_both[which(in_interval=="In Interval")])
## `summarise()` has grouped output by 'vlabel'. You can override using the
## `.groups` argument.
labels <- concordance_both %>%
arrange((m)) %>%
pull(vlabel) %>%
unique() %>%
as.character()
concordance_both %>%
ggplot(aes(y =fct_reorder(vlabel,m),
x = prop,
fill=in_interval)) +
#facet_wrap(~fct_reorder((vlabel),m), ncol=1) +
geom_bar(stat="identity",position="dodge", color="darkgray", linewidth=.2) +
# viridis::scale_fill_viridis(option="mako", begin=.2, discrete=TRUE) +
theme_c(legend.position="right",
legend.title = element_text(face="bold", size = 15),
legend.text= element_text( size = 15),
legend.spacing.y = unit(3, 'pt'),
axis.text.y = element_text(size = 17, hjust=1)) +
scale_y_discrete(labels=unname(TeX(labels))) +
labs(y ="",
title = "Proportion of Observations Where Covidestim Medians\nAre Within, Above, or Below Probabilistic Bias Intervals",
x="Proportion",
fill = "Covidestim Median:\n") +
scale_fill_manual(values=c("In Interval" = "#79D2AF",
"Above Interval" = "#D2AF79",
"Below Interval"="#7997D2")) +
guides(fill=guide_legend(byrow=TRUE)) +
scale_x_continuous(n.breaks = 7, expand=c(0,0),limits=c(-.05,1))

ggsave(here('presentation/figure/prop_contained.pdf'), width=14, height=8, dpi=400)